In each image stack, there are 44 images and only 3 are considered "acceptable" focus. This means the "acceptable" class has a much smaller population than out of focus classes. In previous rounds of training the model I did not correct for this, allowing the difference in population size to bias the training. In the 3 category model (below, acceptable, and above) this had a strong effect causing the model to miss every single acceptable image despite the model's apparent high accuracy. This bias is not as marked in the 5 and 7 category models since the out of focus images are split into more separate classes, but there are still more images in each out of focus class than in the acceptable class. In this notebook I train the model on a set of images in which the bias has been corrected to see if it improves accuracy overall and accuracy in identifying acceptable images as acceptable.
# Quick check to see how many images there are per class
def count_images(file_path):
# Finds class folders, makes a list of classes, and counts how many images are in each class
import os
from pathlib import Path
image_counter = []
class_names = []
for class_name in sorted(os.listdir(file_path)):
# Exclude .DS_Store
if class_name != '.DS_Store':
class_names.append(class_name)
# Make a Path to the class directory
class_dir = Path(file_path) / class_name
# Note that this is set to work with .png images and needs modification
# to work with other types
image_counter.append(len(os.listdir(class_dir)))
return image_counter, class_names
train_path = '/Users/zplab/Desktop/VeraPythonScripts/vera_autofocus/microscope_images_5cat/train'
train_counts, class_names = count_images(train_path)
print(class_names)
print(train_counts)
test_path = '/Users/zplab/Desktop/VeraPythonScripts/vera_autofocus/microscope_images_5cat/test'
test_counts, class_names = count_images(test_path)
print(class_names)
print(test_counts)
In the test images, there are 60 acceptable samples compared to 348 and 292 in the two most out of focus classes.
# Import the image processing functions and class
from image_import import process_image, de_process_image, wormDataset
# Import all needed libraries
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
# These last two are used to save info about how the training progressed
import pickle
import datetime
# Set the full path to the main image directory
train_dir = '/Users/zplab/Desktop/VeraPythonScripts/vera_autofocus/microscope_images_5cat/train'
test_dir = '/Users/zplab/Desktop/VeraPythonScripts/vera_autofocus/microscope_images_5cat/test'
num_train = 10
num_test = 10
means = [0.485, 0.456, 0.406]
stds = [0.229, 0.224, 0.225]
traindata = wormDataset(train_dir, means, stds)
testdata = wormDataset(test_dir, means, stds)
# Get the classes
class_names = traindata.classes
print('Detected ' + str(len(class_names)) + ' classes in training data')
print(class_names)
# Print out how many images are in the trainloader and testloader
print('Traindata length = ' + str(len(traindata)) + ', testdata length = ' + str(len(testdata)))
# Load using the non-corrected set for comparison
# Load from the training and test sets
trainloader = torch.utils.data.DataLoader(traindata, batch_size=num_train, shuffle=True)
testloader = torch.utils.data.DataLoader(testdata, batch_size=num_test, shuffle=True)
# Print out how many images are in the trainloader and testloader
print("Train batch size = " + str(num_train) + ', test batch size = ' + str(num_test))
print('Trainloder length = ' + str(len(trainloader)) + ', testloader length = ' + str(len(testloader)))
# Iterate through images in the test loader to see what labels (corresponding to classes) they have
label_dict = {}
for data in testloader:
images, labels = data
for label in labels:
# Convert the label into a number
label = torch.Tensor.numpy(label)
label = label.item()
if label in label_dict:
label_dict[label] += 1
else:
label_dict[label] = 1
label_dict
The code for the balance correcting sampler is heavily based on this repo: https://github.com/ufoym/imbalanced-dataset-sampler
Basically, the sampler is a data structure that takes the dataset, and then the sampler(dataset) is passed to the dataloader. I re-wrote the sampler to accept the wormDataset.
import torch
from torch.utils import data
class wormDatasetSampler(torch.utils.data.sampler.Sampler):
"""Samples elements randomly from a given list of indices for imbalanced dataset
Arguments:
indices (list, optional): a list of indices
num_samples (int, optional): number of samples to draw
To use the sampler, add an argument to the DataLoader sampler = wormDatasetSampler(wormDataset)
This will pass the weights for the labels to the Dataloader
Note that shuffle must be false, if shuffling is desired that needs to be part of the sampler
"""
def __init__(self, dataset, indices=None, num_samples=None):
# Make a set of indices to iterate through
self.indices = list(range(len(dataset)))
# Get the number of samples in the dataset
self.num_samples = len(self.indices)
# Make a dictionary with labels as keys and number of samples with that label as values
label_to_count = {}
for idx in self.indices:
label = self._get_label(dataset, idx)
if label in label_to_count:
label_to_count[label] += 1
else:
label_to_count[label] = 1
# weight for each sample
weights = [1.0 / label_to_count[self._get_label(dataset, idx)]
for idx in self.indices]
self.weights = torch.DoubleTensor(weights)
# I think these weights are what is passed on to the DataLoader, presumably it knows what to do from there
def _get_label(self, dataset, idx):
# Get the label from the dataset
# In the wormDataset, each sample is a 1, 2 tensor with an array representing the image + the class
# tensor[image_array, class_label]
sample = dataset[idx]
label = sample[1]
return label
#image_import.wormDataset
def __iter__(self):
# The sampler has to specify how to iterate through itself
return (self.indices[i] for i in torch.multinomial(
self.weights, self.num_samples, replacement=True))
def __len__(self):
return self.num_samples
# Re-load the data, this time with the sampler enclosing the dataset
trainloader = torch.utils.data.DataLoader(traindata, sampler = wormDatasetSampler(traindata), batch_size=num_train, shuffle=False)
testloader = torch.utils.data.DataLoader(testdata, sampler = wormDatasetSampler(testdata), batch_size=num_test, shuffle=False)
# Shuffle has to be set to False when using a sampler. If you want shuffling it needs to happen in the sampler
# Print out how many images are in the trainloader and testloader
print("Train batch size = " + str(num_train) + ', test batch size = ' + str(num_test))
print('Trainloader length = ' + str(len(trainloader)) + ', testloader length = ' + str(len(testloader)))
# This code block currently doesn't work, the wormDataset has issues with iterating over itself
# Iterate through images in the test loader to see what labels (corresponding to classes) they have
label_dict = {}
for data in testloader:
images, labels = data
for label in labels:
# Convert the label into a number
label = torch.Tensor.numpy(label)
label = label.item()
if label in label_dict:
label_dict[label] += 1
else:
label_dict[label] = 1
label_dict
Now that the data has been sampled using wormDatasetSampler, there are a similar number of samples per class and population size will not be a source of bias.
%%capture
# Prevent printing out the model architecture
# Check if cuda is available, and set pytorch to run on GPU or CPU as appropriate
if torch.cuda.is_available():
device = torch.device("cuda")
print('Cuda available, running on GPU')
else:
device = torch.device("cpu")
print('Cuda is not available, running on CPU')
# Give the user a message so they know what is going on
model = models.resnet50(pretrained=True)
#print(model)
# Printing the model shows some of the internal layers, not expected to
# understand these but neat to see
# Freeze the pre-trained layers, no need to update featue detection
for param in model.parameters():
param.requires_grad = False
# Get the number of features the model expects in the final fully connected layer, this is different
# in different models
num_ftrs = model.fc.in_features
# Re-define the final fully connected layer (model.fc, fc = fully connected)
model.fc = nn.Sequential(nn.Linear(num_ftrs, 512), # 2048 inputs to 512 outputs
nn.ReLU(),
nn.Dropout(0.2),
# The next line needs to be modified for the number of classes
# in the data set. For the microscope images I currently have
# five classes, so there are 5 outputs
nn.Linear(512, 5), # 512 inputs to 5 outputs
nn.LogSoftmax(dim=1))
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.003)
model.to(device)
# Train the network
epochs = 2
steps = 0
running_loss = 0
print_every = 10
train_losses, test_losses, accuracy_tracker = [], [], []
for epoch in range(epochs):
for inputs, labels in trainloader:
steps += 1
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
logps = model.forward(inputs)
loss = criterion(logps, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if steps % print_every == 0:
test_loss = 0
accuracy = 0
model.eval()
with torch.no_grad():
for inputs, labels in testloader:
inputs = inputs.to(device)
labels = labels.to(device)
logps = model.forward(inputs)
batch_loss = criterion(logps, labels)
test_loss += batch_loss.item()
ps = torch.exp(logps)
top_p, top_class = ps.topk(1, dim=1)
equals = top_class == labels.view(*top_class.shape)
accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
train_losses.append(running_loss/len(trainloader))
test_losses.append(test_loss/len(testloader))
accuracy_tracker.append(accuracy/len(testloader))
print(f"Epoch {epoch+1}/{epochs}.. "
f"Train loss: {running_loss/print_every:.3f}.. "
f"Test loss: {test_loss/len(testloader):.3f}.. "
f"Test accuracy: {accuracy/len(testloader):.3f}")
running_loss = 0
model.train()
torch.save(model, 'resnet50_5cat_unbiased.pth')
# Save the information about how training went
# Get a unique date and time to id this training round
now = datetime.datetime.now()
time_string = ('-').join([str(now.hour), str(now.minute)])
date_string = ('-').join([str(now.month), str(now.day), str(now.year)])
file_name = ('_').join(['resnet50_5cat_training', date_string, time_string])
fileObject = open(file_name, 'wb')
training_data = [train_losses, test_losses, accuracy_tracker]
pickle.dump(training_data, fileObject)
fileObject.close
fileObject.close()
plt.plot(train_losses, label='Training loss')
plt.plot(test_losses, label='Validation loss')
plt.legend(frameon=False)
plt.show()
# Evaluate on all images in the test loader
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on ' + str(total) + ' test images: %d %%' % (
100 * correct / total))
Accuracy of the network trained without correcting for bias was 74%, this is slightly higher but not by much.
# Evaluate on all images in the test loader
confusion_matrix = np.zeros((5, 5))
class_names = testdata.classes
with torch.no_grad():
for data in testloader:
# This is processing in batches, the number of things in images and labels is the
# the same as the batch size
images, labels = data
outputs = model(images)
predicted = torch.max(outputs.data, 1)
num_labels = labels.size(0)
total += num_labels
for i in range(num_labels): # Iterate through the labels in the batch
# Increase the cell corresponding to the label / prediction pair by one
confusion_matrix[labels[i], predicted.indices[i]] += 1
print(confusion_matrix)
# Make a nicer version of the confusion matrix with axis labels
plt.title('5 Category Confusion Matrix')
plt.ylabel('Labels (True Class)')
plt.xlabel('Predicted')
plt.imshow(confusion_matrix, cmap='hot', interpolation='nearest')
plt.show()
# Percent accurate
percent_accurate = np.sum(np.diagonal(confusion_matrix)) / np.sum(confusion_matrix) * 100
print(percent_accurate)
# Row 2 is true acceptable, column 2 is predicted acceptable
acceptable = 2
# Errors in which an acceptable image is mistaken for out of focus
(np.sum(confusion_matrix[acceptable,:]) - confusion_matrix[acceptable, acceptable]) / np.sum(confusion_matrix) * 100
# Errors in which an out of focus image is mistaken for acceptable
(np.sum(confusion_matrix[:,acceptable]) - confusion_matrix[acceptable, acceptable]) / np.sum(confusion_matrix) * 100
Based on this metric the rate of failure errors in both directions is MUCH higher in the unbiased model than in the other models.
Something I realized here is that I've been calculating these accuracies off all the data in the confusion matrix, out of which a larger number should be classed as acceptable / are getting classed as acceptable, which is going to increase the rate of failure errors.
# Percent of failure errors
percent_failure = (np.sum(confusion_matrix[acceptable,:]) + np.sum(confusion_matrix[:,acceptable]) - (2 * confusion_matrix[acceptable,acceptable])) / np.sum(confusion_matrix) * 100
print(percent_failure)
# Errors in which the predicted class is a neighbor of the true class
# Subset the confusion matrix and take the diagonals to get the cells on either side of the main diagonal
# (one class off vs. accurate predictions)
neighbors_pos = np.diagonal(confusion_matrix[1:, :4])
np.sum(neighbors_pos) / np.sum(confusion_matrix) * 100
neighbors_neg = np.diagonal(confusion_matrix[:4, 1:])
np.sum(neighbors_neg) / np.sum(confusion_matrix) * 100
percent_neighbor = (np.sum(neighbors_pos) + np.sum(neighbors_neg)) / np.sum(confusion_matrix) * 100
print(percent_neighbor)
The percent of neighbor errors is higher too
# Errors in which the model gets the direction wrong
# Thinks the plane of focus is above best focus when it is below, and vice-versa
# These are on the upper right to lower left diagonal, get this by flipping the array
np.diagonal(np.fliplr(confusion_matrix))
For some reason ALL the opposite errors occur in the upper right corner - this corresponds to images that are out of focus in the negative direction being classed as out of focus positive. The model never makes a mistake in the opposite direction.
percent_opposite = (np.sum(np.diagonal(np.fliplr(confusion_matrix))) - confusion_matrix[acceptable,acceptable]) / np.sum(confusion_matrix) * 100
print(percent_opposite)
percent_other = 100 - percent_failure - percent_neighbor - percent_opposite - percent_accurate
print(percent_other)
bars = ['Accurate', 'Failure', 'Neighbor', 'Opposite', 'Other']
y_pos = np.arange(len(bars))
plt.bar(y_pos, [percent_accurate, percent_failure, percent_neighbor, percent_opposite, percent_other])
plt.title('Results by Category')
plt.ylabel('% of Total Predictions')
plt.xticks(y_pos, bars)
plt.ylim(0, 100)
plt.show()
Where the effect of the bias really becomes marked is looking at classifying across an entire image stack. In the following section of the notebook I do this with the unbiased classifier and then with a 5 category classifer that did not have the populations adjusted (from "Train Resnet50 with 5 Classes")
# Import libraries
# Import the image processing functions and class
from image_import import process_image, de_process_image, wormDataset
# Import all needed libraries
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
import time
from pathlib import Path
import statistics
# Define functions
def get_prediction(image_path, means, stds):
# Imports and processes an image, then passes it to the model to generate a prediction
with torch.no_grad():
image = process_image(image_path, means, stds)
image.unsqueeze_(0) # Unsqueeze to add a "dummy" dimension - this would be the batch size in a multi image set
model_start = time.time()
output = model(image)
model_end = time.time()
# Convert the output to the top prediction
_, prediction_index = torch.max(output.data, 1)
# Conver the index, which is a tensor, into a numpy array
prediction = torch.Tensor.numpy(prediction_index)
# Get the value out of the numpy array
prediction = prediction.item()
# The focus classes are indicated by numbers, so the index is equivalent to the class name
return prediction, model_start, model_end
# Set the range limits for the focus stack
plane_range = [0, 44]
# Check if cuda is available, and set pytorch to run on GPU or CPU as appropriate
if torch.cuda.is_available():
device = torch.device("cuda")
print('Cuda available, running on GPU')
else:
device = torch.device("cpu")
print('Cuda is not available, running on CPU')
# Give the user a message so they know what is going on
## load the pre-trained model
#model_path = '/mnt/purplearray/Vera/vera_autofocus/compare_num_classes/resnet50_5cat.pth'
#model_path = '/Users/zplab/Desktop/VeraPythonScripts/vera_autofocus/compare_num_classes/resnet50_5cat.pth'
#model = torch.load(model_path)
model.eval() # Put the model in eval mode
# Start logging time
start = time.time()
# Enter the class names list, for some models the folders load in order but in others they don't. Check
# the Jupyter notebook that documents training the model to verify.
# Set means and stds for the model
means = [0.485, 0.456, 0.406]
stds = [0.229, 0.224, 0.225]
# Means and stds for Resnet50, update these if using a different model
# Index for the class "acceptabe" - images that are in focus. This index will change based on how
# many classes the classifer has
acceptable = 2
prediction_list = []
plane_list = list(range(plane_range[0], (plane_range[1] + 1)))
# Set a path to a focus stack
#focus_stack = Path('/mnt/purplearray/Pittman_Will/20190521_cyclo_dead/06/2019-05-23t0923 focus')
focus_stack = Path('/Volumes/purplearray/Pittman_Will/20190521_cyclo_dead/06/2019-06-08t2148 focus')
# mod to /mnt/purplearray/ for linux
# Use this for testing functions
#image_path = '/Volumes/purplearray/Pittman_Will/20190521_cyclo_dead/06/2019-06-14t1105 focus/30.png'
model_starts = []
model_ends = []
for current_plane in plane_list: # Iterate through every image in the stack
# Get an image
# For now this is pulled from a focus stack using the plane to specify the file name
if current_plane > 9:
image_file = str(int(current_plane)) + '.png'
else:
image_file = '0' + str(int(current_plane)) + '.png'
image_path = focus_stack / image_file
prediction, model_start, model_end = get_prediction(image_path, means, stds)
model_starts.append(model_start)
model_ends.append(model_end)
#print(current_plane)
#print('Prediction: ' + str(prediction))
# Capture the current plane + prediction
prediction_list.append(prediction)
# Use the plane and prediction list to get the median plane with an acceptable focus
acceptable_planes = []
for i in range(len(plane_list)):
if prediction_list[i] == acceptable:
acceptable_planes.append(plane_list[i])
print(acceptable_planes)
# Take the median acceptable focus plane as the best
best_focus = int(statistics.median(acceptable_planes))
# Stop logging execution time
end = time.time()
run_time = end - start
# After the best plane has been identified, open RisWidget to let the user view the image that has been deemed acceptable
print('Found best: ' + str(best_focus))
# Print out the number of steps, planes + predictions, and total run time
print('Run time: ' + str(run_time))
model_time = np.array(model_ends) -np.array(model_starts)
model_time = np.sum(model_time)
print('Time in model: ', model_time)
if best_focus > 9:
image_file = str(int(best_focus)) + '.png'
else:
image_file = '0' + str(int(best_focus)) + '.png'
image_path = focus_stack / image_file
# Take a look at the "best focus"
%matplotlib inline
# Import libraries
from PIL import Image
import numpy as np
from matplotlib.pyplot import imshow, hist
from IPython.display import Image
Image(image_path)